home *** CD-ROM | disk | FTP | other *** search
- #
- # zurllib.py
- #
- # This is (hopefully) a drop-in for urllib which will request gzip/deflate
- # compression and then decompress the output if a compressed response is
- # received while maintaining the API.
- #
- # by Robert Stone 2/22/2003
- #
-
- from urllib import *
- from urllib2 import *
- from gzip import GzipFile
- from StringIO import StringIO
- from __init__ import version
- import pprint
- import config
- import prefs
-
-
- DEBUG=0
-
-
- class HTTPContentEncodingHandler(HTTPHandler):
- """Inherit and add gzip/deflate/etc support to HTTP gets."""
- def http_open(self, req):
- # add the Accept-Encoding header to the request
- # support gzip encoding (identity is assumed)
- req.add_header("Accept-Encoding","gzip")
- # Added correct capitalization and Democracy info to string --NN
- req.add_header('User-Agent', 'BitTorrent/%s %s/%s (%s)' % \
- (version,
- config.get(prefs.SHORT_APP_NAME),
- config.get(prefs.APP_VERSION),
- config.get(prefs.PROJECT_URL)))
-
- if DEBUG:
- print "Sending:"
- print req.headers
- print "\n"
- fp = HTTPHandler.http_open(self,req)
- headers = fp.headers
- if DEBUG:
- pprint.pprint(headers.dict)
- url = fp.url
- resp = addinfourldecompress(fp, headers, url)
- # As of Python 2.4 http_open response also has 'code' and 'msg'
- # members, and HTTPErrorProcessor breaks if they don't exist.
- if 'code' in dir(fp):
- resp.code = fp.code
- if 'msg' in dir(fp):
- resp.msg = fp.msg
- return resp
-
- class addinfourldecompress(addinfourl):
- """Do gzip decompression if necessary. Do addinfourl stuff too."""
- def __init__(self, fp, headers, url):
- # we need to do something more sophisticated here to deal with
- # multiple values? What about other weird crap like q-values?
- # basically this only works for the most simplistic case and will
- # break in some other cases, but for now we only care about making
- # this work with the BT tracker so....
- if headers.has_key('content-encoding') and headers['content-encoding'] == 'gzip':
- if DEBUG:
- print "Contents of Content-encoding: " + headers['Content-encoding'] + "\n"
- self.gzip = 1
- self.rawfp = fp
- fp = GzipStream(fp)
- else:
- self.gzip = 0
- return addinfourl.__init__(self, fp, headers, url)
-
- def close(self):
- self.fp.close()
- if self.gzip:
- self.rawfp.close()
-
- def iscompressed(self):
- return self.gzip
-
- class GzipStream(StringIO):
- """Magically decompress a file object.
-
- This is not the most efficient way to do this but GzipFile() wants
- to seek, etc, which won't work for a stream such as that from a socket.
- So we copy the whole shebang info a StringIO object, decompress that
- then let people access the decompressed output as a StringIO object.
-
- The disadvantage is memory use and the advantage is random access.
-
- Will mess with fixing this later.
- """
-
- def __init__(self,fp):
- self.fp = fp
-
- # this is nasty and needs to be fixed at some point
- # copy everything into a StringIO (compressed)
- compressed = StringIO()
- r = fp.read()
- while r:
- compressed.write(r)
- r = fp.read()
- # now, unzip (gz) the StringIO to a string
- compressed.seek(0,0)
- gz = GzipFile(fileobj = compressed)
- str = ''
- r = gz.read()
- while r:
- str += r
- r = gz.read()
- # close our utility files
- compressed.close()
- gz.close()
- # init our stringio selves with the string
- StringIO.__init__(self, str)
- del str
-
- def close(self):
- self.fp.close()
- return StringIO.close(self)
-
-
- def test():
- """Test this module.
-
- At the moment this is lame.
- """
-
- print "Running unit tests.\n"
-
- def printcomp(fp):
- try:
- if fp.iscompressed():
- print "GET was compressed.\n"
- else:
- print "GET was uncompressed.\n"
- except:
- print "no iscompressed function! this shouldn't happen"
-
- print "Trying to GET a compressed document...\n"
- fp = urlopen('http://a.scarywater.net/hng/index.shtml')
- print fp.read()
- printcomp(fp)
- fp.close()
-
- print "Trying to GET an unknown document...\n"
- fp = urlopen('http://www.otaku.org/')
- print fp.read()
- printcomp(fp)
- fp.close()
-
-
- #
- # Install the HTTPContentEncodingHandler that we've defined above.
- #
- install_opener(build_opener(HTTPContentEncodingHandler))
-
- if __name__ == '__main__':
- test()
-
-